setwd("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles")
library(phyloseq)
library(dplyr)
library(viridis)
library(stringr)
library(vegan)
library(RColorBrewer)
library(BiocManager)
BiocManager::install("microbiome")
library(microbiome)
library(microbiomeutilities)
library(ggplot2)
library(knitr)
library(ggpubr)
library(pheatmap)
library(ape)
library(multcomp)
metadata <-read.table("metadata.txt", sep="\t", header = T, row.names = 1, fill = 1)
# Load data
metaxa_genus <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/metaxa_genus.txt")
# Create OTU table
OTU_metaxa <- metaxa_genus[,-1]
# Create tax table
tax_table_metaxa <- data.frame(str_split_fixed(data.frame(metaxa_genus) [,1], ";", 6))
colnames(tax_table_metaxa) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus")
# Combine into phyloseq object
metaxa_PHY <- phyloseq(otu_table(OTU_metaxa, taxa_are_rows=TRUE),
tax_table(as.matrix(tax_table_metaxa)), sample_data(metadata))
# Exclude Unknown
metaxa_PHY <- subset_taxa(metaxa_PHY, !Domain %in% c("Unknown"))
metadata$SSU_counts <- sample_sums(metaxa_PHY)
# Create extra column for Eucaryota, might be interesting to check out later
metaxa_PHY_temp <- subset_taxa(metaxa_PHY, !Domain %in% c("Unknown", "Bacteria", "Archaea", "Mitochondria", "Chloroplast"))
metadata$SSU_eucaryota <- sample_sums(metaxa_PHY_temp)
# With low quality (BFH24_S142, BH63_S118, FH10_S171), do at least this.
metaxa_PHY = subset_samples(metaxa_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")
# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
metaxa_PHY_ww = subset_samples(metaxa_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")
# Or alternatively include fecal but exclude suspicious samples
# metaxa_PHY_ast = subset_samples(metaxa_PHY, "BH02_S77" & name != "BH27_S92" & name != "BH30_S94")
# Suspicious samples (very high ARG level)
metaxa_PHY_clean = subset_samples(metaxa_PHY_ww, name != "BH02_S77" & name != "BH27_S92" & name != "BH30_S94")
# Clear taxa that are not present in any of the remaining samples
any(taxa_sums(metaxa_PHY_clean) == 0)
## [1] TRUE
metaxa_PHY_clean <- prune_taxa(taxa_sums(metaxa_PHY_clean) > 0, metaxa_PHY_clean)
any(taxa_sums(metaxa_PHY_clean) == 0)
## [1] FALSE
# Change into relative data
metaxa_rel_PHY <- transform_sample_counts(metaxa_PHY_clean, function(x) x/sum(x))
# Ordinate and plot data
metaxa_rel_PHY_ord <- ordinate(metaxa_rel_PHY, method = "PCoA", distance = "horn")
p1 <- plot_ordination(metaxa_rel_PHY, metaxa_rel_PHY_ord, color = "country", label = "name")
metaxa.p1 <- p1 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) + geom_point(colour = "black",
pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.95, linetype = 1) +
theme_minimal() + labs(title = "Metaxa2")
metaxa.p1
# Test significance using pair-wise adonis
metaxa_temp <- subset_samples(metaxa_PHY_clean, (country == "Benin" | country == "Finland"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.9647 1.96468 9.2344 0.16719 0.001 ***
## Residuals 46 9.7868 0.21276 0.83281
## Total 47 11.7515 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaxa_temp <- subset_samples(metaxa_PHY_clean, (country == "Burkina Faso" | country == "Finland"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.0339 1.03386 4.7505 0.09549 0.001 ***
## Residuals 45 9.7935 0.21763 0.90451
## Total 46 10.8274 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaxa_temp <- subset_samples(metaxa_PHY_clean, (country == "Benin" | country == "Burkina Faso"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 0.8852 0.88524 3.8733 0.04911 0.001 ***
## Residuals 75 17.1409 0.22855 0.95089
## Total 76 18.0262 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Finnish samples seems to harbour more distinct taxonomic dibversity than those of Benin (R2=16.7%, p=0.001) and Burkina Faso (R2=9.6 %, p=0.001). Instead There is little grouping by country between Benin and Burkina Faso.
Benin <- subset_samples(metaxa_PHY_clean, (country == "Benin"))
Benin_dist <- vegdist(t(otu_table(Benin)), dist = "horn")
adonis(Benin_dist ~ location, data = data.frame(sample_data(Benin)), permutations = 99999)
##
## Call:
## adonis(formula = Benin_dist ~ location, data = data.frame(sample_data(Benin)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 4 2.1829 0.54572 2.9063 0.2548 1e-05 ***
## Residuals 34 6.3842 0.18777 0.7452
## Total 38 8.5671 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
BF <- subset_samples(metaxa_PHY_clean, (country == "Burkina Faso"))
BF_dist <- vegdist(t(otu_table(BF)), dist = "horn")
adonis(BF_dist ~ location, data = data.frame(sample_data(BF)), permutations = 99999)
##
## Call:
## adonis(formula = BF_dist ~ location, data = data.frame(sample_data(BF)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 4 2.0796 0.51989 2.6418 0.24255 1e-05 ***
## Residuals 33 6.4943 0.19680 0.75745
## Total 37 8.5738 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Finland <- subset_samples(metaxa_PHY_clean, (country == "Finland"))
Finland_dist <- vegdist(t(otu_table(Finland)), dist = "horn")
adonis(Finland_dist ~ location, data = data.frame(sample_data(Finland)), permutations = 99999)
##
## Call:
## adonis(formula = Finland_dist ~ location, data = data.frame(sample_data(Finland)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 3 0.59080 0.19693 1.5658 0.48439 0.1495
## Residuals 5 0.62888 0.12578 0.51561
## Total 8 1.21968 1.00000
# In Benin the location defines the taxonomic composition at some level (R2=25.5 %, p=1e-05), as well as in Burkina Faso (R2=24.3 %, p=1e-05). In Finland grouping by location is not significant (due to small sample size).
# Modify data in command-line to match sample names in metadata file
## tr '|' ';' <merged_abundance_table.txt > mod_merged_abundance_table.txt
## sed -i 's/_profile//g' mod_merged_abundance_table.txt
## sed -i 's/BFH38-A_S156/BFH38.A_S156/g' mod_merged_abundance_table.txt
## sed -i 's/BFH38-B_S157/BFH38.B_S157/g' mod_merged_abundance_table.txt
## sed -i 's/BH34-A_S98/BH34.A_S98/g' mod_merged_abundance_table.txt
## sed -i 's/BH34-B_S99/BH34.B_S99/g' mod_merged_abundance_table.txt
# Load data
metaphlan <- as.matrix(read.table("mod_merged_abundance_table.txt", fill = 1, header = T, check.names = F))
# Exclude the first column "NCBI_tax_id"
metaphlan <- subset(metaphlan, select = -c(NCBI_tax_id))
# Create OTU table
OTU_metaphlan <- metaphlan[,-1]
# Change values as numeric
class(OTU_metaphlan) <- "numeric"
# Create tax_table
tax_table_metaphlan <- data.frame(str_split_fixed(data.frame(metaphlan) [,1], ";", 7))
colnames(tax_table_metaphlan) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
# Clean "*__"
tax_table_metaphlan <- apply(tax_table_metaphlan, 2, function(y) (gsub(".__", "", y)))
# Combine into phyloseq object
metaphlan_PHY <- phyloseq(otu_table(OTU_metaphlan, taxa_are_rows = TRUE), tax_table(as.matrix(tax_table_metaphlan)), sample_data(metadata))
# Virus
metaphlan_PHY <- subset_taxa(metaphlan_PHY, Kingdom != "Virus")
# With low quality (BFH24_S142, BH63_S118, FH10_S171)
metaphlan_PHY = subset_samples(metaphlan_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")
# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
metaphlan_PHY_ww = subset_samples(metaphlan_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")
# Or alternatively include fecal but exclude suspicious samples
# metaphlan_PHY_ast = subset_samples(metaphlan_PHY, "BH02_S77" & name != "BH27_S92" & name != "BH30_S94")
# Suspicious samples (very high ARG level)
metaphlan_PHY_clean = subset_samples(metaphlan_PHY_ww, name != "BH02_S77" & name != "BH27_S92" & name != "BH30_S94")
# Clear taxa that is not present in any of the remaining samples
any(taxa_sums(metaphlan_PHY_clean) == 0)
## [1] TRUE
metaphlan_PHY_clean <- prune_taxa(taxa_sums(metaphlan_PHY_clean) > 0, metaphlan_PHY_clean)
any(taxa_sums(metaphlan_PHY_clean) == 0)
## [1] FALSE
# Ordinate and plot data
metaphlan_PHY_ord <- ordinate(metaphlan_PHY_clean, method = "PCoA", distance = "horn")
p2 <- plot_ordination(metaphlan_PHY_clean, metaphlan_PHY_ord, color = "country", label = "sample_type")
metaphlan.p2 <- p2 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) +
geom_point(colour = "black", pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.95, linetype = 1) +
theme_minimal() + labs(title = "Metaphlan3")
metaphlan.p2
# Test significance using pair-wise adonis
metaphlan_temp <- subset_samples(metaphlan_PHY_clean, (country == "Benin" | country == "Finland"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 0.6468 0.64678 3.6939 0.07433 0.005 **
## Residuals 46 8.0544 0.17510 0.92567
## Total 47 8.7012 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaphlan_temp <- subset_samples(metaphlan_PHY_clean, (country == "Burkina Faso" | country == "Finland"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 0.3728 0.37282 2.5133 0.0529 0.017 *
## Residuals 45 6.6753 0.14834 0.9471
## Total 46 7.0481 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaphlan_temp <- subset_samples(metaphlan_PHY_clean, (country == "Burkina Faso" | country == "Benin"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 0.7877 0.78768 4.4682 0.05623 0.001 ***
## Residuals 75 13.2216 0.17629 0.94377
## Total 76 14.0093 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The explanatory variable "country" explains less than 10 % of the grouping in every pair-wise comparison.
# There seems to be a small group of samples differing from other ones. They represent mostly other than ww samples. Let's see if this grouping is significant.
metaphlan_temp <- subset_samples(metaphlan_PHY_clean, (sample_type == "waste water" | sample_type == "soil"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ sample_type, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaphlan_dist ~ sample_type, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## sample_type 1 0.2356 0.23558 1.4281 0.01893 0.152
## Residuals 74 12.2075 0.16497 0.98107
## Total 75 12.4431 1.00000
metaphlan_temp <- subset_samples(metaphlan_PHY_clean, (sample_type == "waste water" | sample_type == "river water"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ sample_type, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaphlan_dist ~ sample_type, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## sample_type 1 0.6073 0.60729 3.6779 0.04616 0.002 **
## Residuals 76 12.5493 0.16512 0.95384
## Total 77 13.1566 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Pair-wise comparison between ww samples and river water gives a significant (p=0.002) R2 value of 4.6 %.
Benin <- subset_samples(metaphlan_PHY_clean, (country == "Benin"))
Benin_dist <- vegdist(t(otu_table(Benin)), dist = "horn")
adonis(Benin_dist ~ location, data = data.frame(sample_data(Benin)), permutations = 99999)
##
## Call:
## adonis(formula = Benin_dist ~ location, data = data.frame(sample_data(Benin)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 4 2.1667 0.54168 3.5875 0.29679 1e-05 ***
## Residuals 34 5.1337 0.15099 0.70321
## Total 38 7.3004 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
BF <- subset_samples(metaphlan_PHY_clean, (country == "Burkina Faso"))
BF_dist <- vegdist(t(otu_table(BF)), dist = "horn")
adonis(BF_dist ~ location, data = data.frame(sample_data(BF)), permutations = 99999)
##
## Call:
## adonis(formula = BF_dist ~ location, data = data.frame(sample_data(BF)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 4 1.3162 0.32904 2.3579 0.22228 6e-05 ***
## Residuals 33 4.6050 0.13955 0.77772
## Total 37 5.9212 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Finland <- subset_samples(metaphlan_PHY_clean, (country == "Finland"))
Finland_dist <- vegdist(t(otu_table(Finland)), dist = "horn")
adonis(Finland_dist ~ location, data = data.frame(sample_data(Finland)), permutations = 99999)
##
## Call:
## adonis(formula = Finland_dist ~ location, data = data.frame(sample_data(Finland)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 3 0.38042 0.126806 1.6969 0.50449 0.1398
## Residuals 5 0.37364 0.074728 0.49551
## Total 8 0.75406 1.00000
# The results seem similar to those from Metaxa2. Benin R2=29.7 %, p=1e-05; BF R2=22.2 %, p=5e-05; Finland not significant.
# Modify mapping output file "ARG_genemat.txt" in command line to match sample names in metadata file
## sed 's/BFH38-A_S156/BFH38.A_S156/g' ARG_genemat.txt > mod_ARG_genemat.txt
## sed -i 's/BFH38-B_S157/BFH38.B_S157/g' mod_ARG_genemat.txt
## sed -i 's/BH34-A_S98/BH34.A_S98/g' mod_ARG_genemat.txt
## sed -i 's/BH34-B_S99/BH34.B_S99/g' mod_ARG_genemat.txt
ARG_genemat <-as.matrix(read.table("mod_ARG_genemat.txt", header= T, check.names = F, row.names = 1))
# Save counts without gene names
#write.table(ARG_genemat, "~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_counts", row.names=T, sep = "\t", col.names = T)
#ARG_genemat_counts <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_counts", row.names=NULL)
#ARG_genemat_counts$row.names<-NULL
# Modify "resfinder.fasta" file so that only hits remain
## seqkit grep -f ARG_genes.txt resfinder.fasta > filtered_resfinder.fasta
# Check if there are correct number of lines
## grep ">" filtered_resfinder.fasta | wc -l
# Print out the gene lengths of these genes into file "lengths_resfinder.txt"
## cat filtered_resfinder.fasta | awk '$0 ~ ">" {if (NR > 1) {print c;} c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }' > lengths_resfinder.txt
# Reorder in excel to match with file "ARG_genemat_norm"
#lengths_resfinder <-as.matrix(read.table("lengths_resfinder.txt", header= F, check.names = T, row.names = 1))
#colnames(lengths_resfinder) <- c("Length")
# Divide by ResFinder hit gene lengths
#ARG_length_norm <- ARG_genemat/lengths_resfinder[, 1]
# Divide by SSU counts and normalize to bacterial 16S rRNA length (1550)
#ARG_genemat_norm <- t(t(ARG_length_norm)/metadata$SSU_counts) * 1550
# Check if correct:
#identical(ARG_genemat[2020, 4]/metadata$SSU_counts[4], ARG_genemat_norm[2020, 4])
# Save and load again to exclude row.names
#write.table(ARG_genemat_norm, "~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=T, sep = "\t", col.names = T)
#ARG_genemat_norm <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=NULL)
#ARG_genemat_norm$row.names<-NULL
ARG_genemat_norm <- t(t(ARG_genemat)/metadata$SSU_counts)
# Check if correct:
identical(ARG_genemat[2020, 4]/metadata$SSU_counts[10], ARG_genemat_norm[2020, 4])
## [1] TRUE
# Save and load again to exclude row.names
write.table(ARG_genemat_norm, "~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=T, sep = "\t", col.names = T)
ARG_genemat_norm <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=NULL)
ARG_genemat_norm$row.names<-NULL
# Create tax table
tax_table_resfinder <- read.csv("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/tax_table_resfinder.txt", header=FALSE, sep=";")
colnames(tax_table_resfinder) <- c("Gene", "Class")
# Combine to phyloseq object
resfinder_PHY <- phyloseq(otu_table(ARG_genemat_norm, taxa_are_rows = TRUE), sample_data(metadata),
tax_table(as.matrix(tax_table_resfinder)))
# Have a quick look
plot_bar(otu_table(resfinder_PHY))
# There are the suspicious samples (BH02, BH27, BH30).
# With low quality (BFH24_S142, BH63_S118, FH10_S171)
resfinder_PHY = subset_samples(resfinder_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")
# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
resfinder_PHY_ww = subset_samples(resfinder_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")
# Or alternatively include fecal but exclude suspicious samples
# resfinder_PHY_ast = subset_samples(resfinder_PHY, "BH02_S77" & name != "BH27_S92" & name != "BH30_S94")
# Suspicious samples (very high ARG level) (BH02_S77, BH27_S92, BH30_S94)
resfinder_PHY_clean = subset_samples(resfinder_PHY_ww, name != "BH02_S77" & name != "BH27_S92" & name != "BH30_S94")
# Check if there are taxa that are not present in any of the remaining samples
any(taxa_sums(resfinder_PHY_clean) == 0)
## [1] TRUE
resfinder_PHY_clean <- prune_taxa(taxa_sums(resfinder_PHY_clean) > 0, resfinder_PHY_clean)
any(taxa_sums(resfinder_PHY_clean) == 0)
## [1] FALSE
# Ordinate and plot data
resfinder_PHY_ord <- ordinate(resfinder_PHY_clean, method = "PCoA", distance = "horn")
p3 <- plot_ordination(resfinder_PHY_clean, resfinder_PHY_ord, color = "country", label = "name")
resfinder.p3 <- p3 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) +
geom_point(colour = "black", pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.95, linetype = 1) +
theme_minimal() + labs(title = "ResFinder")
resfinder.p3
# Test significance between all pairs
resfinder_temp <- subset_samples(resfinder_PHY_clean, (country == "Finland" | country == "Benin"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.5666 1.56661 5.9906 0.11522 0.001 ***
## Residuals 46 12.0295 0.26151 0.88478
## Total 47 13.5961 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resfinder_temp <- subset_samples(resfinder_PHY_clean, (country == "Burkina Faso" | country == "Benin"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 0.7633 0.76329 3.0538 0.03912 0.002 **
## Residuals 75 18.7461 0.24995 0.96088
## Total 76 19.5094 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resfinder_temp <- subset_samples(resfinder_PHY_clean, (country == "Finland" | country == "Burkina Faso"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.4172 1.41723 6.4599 0.12553 0.001 ***
## Residuals 45 9.8725 0.21939 0.87447
## Total 46 11.2897 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Between Benin and Finland, country explains 11.5 % (p=0.001) of the divergence in the resistome. Between BF and Finland the same is 12.6 % (p=0.001).
Benin <- subset_samples(resfinder_PHY_clean, (country == "Benin"))
Benin_dist <- vegdist(t(otu_table(Benin)), dist = "horn")
adonis(Benin_dist ~ location, data = data.frame(sample_data(Benin)), permutations = 99999)
##
## Call:
## adonis(formula = Benin_dist ~ location, data = data.frame(sample_data(Benin)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 4 2.7747 0.69368 3.0722 0.26548 2e-05 ***
## Residuals 34 7.6768 0.22579 0.73452
## Total 38 10.4516 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
BF <- subset_samples(resfinder_PHY_clean, (country == "Burkina Faso"))
BF_dist <- vegdist(t(otu_table(BF)), dist = "horn")
adonis(BF_dist ~ location, data = data.frame(sample_data(BF)), permutations = 99999)
##
## Call:
## adonis(formula = BF_dist ~ location, data = data.frame(sample_data(BF)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 4 1.6955 0.42388 2.1197 0.20441 0.00197 **
## Residuals 33 6.5990 0.19997 0.79559
## Total 37 8.2945 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Finland <- subset_samples(resfinder_PHY_clean, (country == "Finland"))
Finland_dist <- vegdist(t(otu_table(Finland)), dist = "horn")
adonis(Finland_dist ~ location, data = data.frame(sample_data(Finland)), permutations = 99999)
##
## Call:
## adonis(formula = Finland_dist ~ location, data = data.frame(sample_data(Finland)), permutations = 99999)
##
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## location 3 0.71217 0.23739 1.371 0.45133 0.1393
## Residuals 5 0.86577 0.17315 0.54867
## Total 8 1.57794 1.00000
# In Benin and BF the grouping by location has R2 values like 26.5 % (p=4e-05) and 20.4 % (p=0.00175). In Finland no significant correlation can be proven.
# Take a look at the indexes
metaxa_tab <-microbiome::alpha(metaxa_PHY_clean, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(metaxa_tab))
# Create data table
metaxa.meta <- meta(metaxa_PHY_clean)
kable(head(metaxa.meta))
# Add indexes to data table
## Shannon
metaxa.meta$diversity_shannon <- metaxa_tab$diversity_shannon
## Gini Simpson
metaxa.meta$diversity_gini_simpson <- metaxa_tab$diversity_gini_simpson
## Inverse Simpson
metaxa.meta$diversity_inverse_simpson <- metaxa_tab$diversity_inverse_simpson
# Check whether the distribution of the diversity is normal
hist(metaxa.meta$diversity_shannon)
shapiro.test(metaxa.meta$diversity_shannon) # Does not fit the assumption of normal distribution > wilcox test.
qqnorm(metaxa.meta$diversity_shannon)
hist(metaxa.meta$diversity_gini_simpson)
shapiro.test(metaxa.meta$diversity_gini_simpson) # Does not fit the assumption of normal distribution > wilcox test.
qqnorm(metaxa.meta$diversity_gini_simpson)
hist(metaxa.meta$diversity_inverse_simpson)
shapiro.test(metaxa.meta$diversity_inverse_simpson)
qqnorm(metaxa.meta$diversity_inverse_simpson)
# Create a list of pairwise comparisons
div.country <- levels(metaxa.meta$country)
country.pairs <- combn(seq_along(div.country), 2, simplify = FALSE, FUN = function(i)div.country[i])
print(country.pairs)
# For inverse simpson index, data fullfills the assumption for normal distribution. For that t-test will be apllied. For the other too wilcox test will be used to test the significance.
# Shannon
p4 <- ggviolin(metaxa.meta, x = "country", y = "diversity_shannon",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p4 <- p4 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
print(p4)
# Gini Simpson
p5 <- ggviolin(metaxa.meta, x = "country", y = "diversity_gini_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p5 <- p5 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
print(p5)
# Inverse Simpson
p6 <- ggviolin(metaxa.meta, x = "country", y = "diversity_inverse_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p6 <- p6 + stat_compare_means(comparisons = country.pairs, method = "t.test")
print(p6)
# In Finnish samples seems to have significantly lower alpha diversity in taxonomy according to Metaxa2 compared to Benin and Burkina Faso.
# Shannon
## Take a look at the indexes
metaphlan_tab <-microbiome::alpha(metaphlan_PHY_clean, index = "diversity_shannon")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(metaphlan_tab))
## Create data table
metaphlan.meta <- meta(metaphlan_PHY_clean)
kable(head(metaphlan.meta))
## Add indexes to data table
metaphlan.meta$diversity_shannon <- metaphlan_tab$diversity_shannon
# Check whether the distribution of the diversity is normal
hist(metaphlan.meta$diversity_shannon)
shapiro.test(metaphlan.meta$diversity_shannon)
qqnorm(metaphlan.meta$diversity_shannon)
# Gini Simpson
## Take a look at the indexes
metaphlan_tab <-microbiome::alpha(metaphlan_PHY_clean, index = "diversity_gini_simpson")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(metaphlan_tab))
## Add indexes to data table
metaphlan.meta$diversity_gini_simpson <- metaphlan_tab$diversity_gini_simpson
# Check whether the distribution of the diversity is normal
hist(metaphlan.meta$diversity_gini_simpson)
shapiro.test(metaphlan.meta$diversity_gini_simpson)
qqnorm(metaphlan.meta$diversity_gini_simpson)
# Create a list of pairwise comparisons
div.country <- levels(metaphlan.meta$country)
country.pairs <- combn(seq_along(div.country), 2, simplify = FALSE, FUN = function(i)div.country[i])
print(country.pairs)
# Inverse Simpson
## Take a look at the indexes
metaphlan_tab <-microbiome::alpha(metaphlan_PHY_clean, index = "diversity_inverse_simpson")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(metaphlan_tab))
## Add indexes to data table
metaphlan.meta$diversity_inverse_simpson <- metaphlan_tab$diversity_inverse_simpson
# Check whether the distribution of the diversity is normal
hist(metaphlan.meta$diversity_inverse_simpson)
shapiro.test(metaphlan.meta$diversity_inverse_simpson)
qqnorm(metaphlan.meta$diversity_inverse_simpson)
p7 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_shannon",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
metaphlan.p7 <- p7 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
metaphlan.p7
p8 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_gini_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
metaphlan.p8 <- p8 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
metaphlan.p8
p9 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_inverse_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
metaphlan.p9 <- p9 + stat_compare_means(comparisons = country.pairs, method = "t.test")
metaphlan.p9
# According to metaphlan there are no significant differences between the alpha diversities of samples from different countries.
# Shannon
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_clean, index = "diversity_shannon")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
## Create data table
resfinder.meta <- meta(resfinder_PHY_clean)
kable(head(resfinder.meta))
## Add indexes to data table
resfinder.meta$diversity_shannon <- resfinder_tab$diversity_shannon
# Check whether the distribution of the diversity is normal
hist(resfinder.meta$diversity_shannon)
shapiro.test(resfinder.meta$diversity_shannon) # Does not fit the assumption of normal distribution > wilcox test.
qqnorm(resfinder.meta$diversity_shannon)
# Create a list of pairwise comparisons
div.country <- levels(resfinder.meta$country)
country.pairs <- combn(seq_along(div.country), 2, simplify = FALSE, FUN = function(i)div.country[i])
print(country.pairs)
# Gini Simpson
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_clean, index = "diversity_gini_simpson")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
## Add indexes to data table
resfinder.meta$diversity_gini_simpson <- resfinder_tab$diversity_gini_simpson
# Check whether the distribution of the diversity is normal
hist(resfinder.meta$diversity_gini_simpson)
shapiro.test(resfinder.meta$diversity_gini_simpson) # Does not fit the assumption of normal distribution > wilcox test.
qqnorm(resfinder.meta$diversity_gini_simpson)
# Inverse Simpson
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_clean, index = "diversity_inverse_simpson")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
## Add indexes to data table
resfinder.meta$diversity_inverse_simpson <- resfinder_tab$diversity_inverse_simpson
# Check whether the distribution of the diversity is normal
hist(resfinder.meta$diversity_inverse_simpson)
shapiro.test(resfinder.meta$diversity_inverse_simpson)
qqnorm(resfinder.meta$diversity_inverse_simpson)
p10 <- ggviolin(resfinder.meta, x = "country", y = "diversity_shannon",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
resfinder.p10 <- p10 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
resfinder.p10
p11 <- ggviolin(resfinder.meta, x = "country", y = "diversity_gini_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
resfinder.p11 <- p11 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
resfinder.p11
p12 <- ggviolin(resfinder.meta, x = "country", y = "diversity_inverse_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
resfinder.p12 <- p12 + stat_compare_means(comparisons = country.pairs, method = "t.test")
resfinder.p12
# The resistome seems less complex in Finnish samples than in Burkinabe and Beninise.
nb.cols <- 14
mycolors <- colorRampPalette(brewer.pal(11, "Spectral"))(nb.cols)
loc <- plot_richness(resfinder_PHY_clean, measures = c("Shannon", "Simpson"),
color="location", shape = "country")
resfinder.in <- loc + scale_fill_manual(values=mycolors) +
geom_boxplot() +
geom_point(size=3) + theme_minimal() +
theme(axis.text.x = element_text(angle = 90, size =5))
resfinder.in
# Taxonomic composition by Metaxa2 using microbiome package
# Use the unfiltered data this time to see the differences in fecal samples etc
metaxa_rel_PHY_orig <- transform_sample_counts(metaxa_PHY, function(x) x/sum(x))
# Load data
metaxa.com <- metaxa_rel_PHY_orig
taxic_metaxa <- as.data.frame(metaxa.com@tax_table)
# Add OTU ids
taxic_metaxa$OTU <- rownames(taxic_metaxa)
colnames(taxic_metaxa)
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus" "OTU"
# Convert into a matrix.
taxmat_metaxa <- as.matrix(taxic_metaxa)
# Convert into phyloseq compatible file.
new.tax_metaxa <- tax_table(taxmat_metaxa)
# Incroporate into new phyloseq object
tax_table(metaxa.com) <- new.tax_metaxa
# Class level
metaxa_top5class <- aggregate_top_taxa(metaxa.com, top = 5, "Class")
metaxa_top5class.p13 <- plot_composition(metaxa_top5class, group_by = "country") +
theme(legend.position = "bottom") + theme_classic() +
theme(axis.text.x = element_text(angle = 90, size = 8), axis.title = element_blank()) +
ggtitle("Relative abundance of top 5 classes, Metaxa2")
metaxa_top5class.p13 +
scale_fill_manual(values = c("#1B9E77","#E6AB02","#7570B3","#E7298A","#66A61E", "#FFFFFF"))
# Technicval replicates seem highly similar to each other. Fecal samples have different diversity in the class taxonomy: more Closridia and Betaproteobacteria than Gammaproteobacteria and no Epsilonproteobacteria when comparing to ww samples. Of the suscpicious samples especially in BH02 the Gammaproteobacteria are dominating. This would make sense if the sample was from CLED plate which is selective for Enterobacteriaceae species.
metaxa_PHY_genus <- tax_glom(metaxa_rel_PHY, "Genus")
metaxa_PHY_genus_abun <- prune_taxa(names(sort(taxa_sums(metaxa_PHY_genus), TRUE)[1:15]),
metaxa_PHY_genus)
# Transform_1
metaxa_PHY_genus_abun_trans <- transform_sample_counts(metaxa_PHY_genus_abun, function(x) x / sum(x))
# Merge by location
metaxa_mrg_loc <- merge_samples(metaxa_PHY_genus_abun_trans, "location")
# Transform_2
metaxa_mrg_loc_trans <- transform_sample_counts(metaxa_mrg_loc, function(x) x / sum(x))
# Normalize by sample number / location
otu_table(metaxa_mrg_loc_trans) <- otu_table(metaxa_mrg_loc_trans)[, ]/as.matrix(table(sample_data(metaxa_PHY)$location))[,
1]
# Set colors
nb.cols <- 15
mycolors <- colorRampPalette(brewer.pal(11, "Spectral"))(nb.cols)
# Facet labs
country.labs <- c("1" = "Benin", "2" = "Burkina Faso", "3" = "Finland")
# Plot
metaxa.p14 <- plot_bar(metaxa_mrg_loc_trans, fill = "Genus", title = "15 most abundant genera, Metaxa2") +
facet_grid(~country, scales = "free_x", space = "free_x", labeller = labeller(country = country.labs)) +
theme_classic() +
theme(axis.text.x = element_text(angle=55, hjust=1, size = 10),
axis.title.x = element_blank()) +
ylab("Merged relative abundance (%)") + xlab("") +
scale_fill_manual(values=mycolors) + geom_bar(stat="identity") +
guides(fill = guide_legend(label.theme = element_text(size = 8), keywidth = 1.5, keyheight = 1.5, title = "Genus"))
metaphlan_PHY_genus <- tax_glom(metaphlan_PHY_clean, "Genus")
metaphlan_PHY_genus_abun <- prune_taxa(names(sort(taxa_sums(metaphlan_PHY_genus), TRUE)[1:15]),
metaphlan_PHY_genus)
# Transform_1
metaphlan_PHY_genus_abun_trans <- transform_sample_counts(metaphlan_PHY_genus_abun, function(x) x / sum(x))
# Merge by location
metaphlan_mrg_loc <- merge_samples(metaphlan_PHY_genus_abun_trans, "location")
# Transform_2
metaphlan_mrg_loc_trans <- transform_sample_counts(metaphlan_mrg_loc, function(x) x / sum(x))
# Normalize by sample number / location
otu_table(metaphlan_mrg_loc_trans) <- otu_table(metaphlan_mrg_loc_trans)[, ]/as.matrix(table(sample_data(metaphlan_PHY)$location))[,
1]
# Set colors
nb.cols <- 15
mycolors <- colorRampPalette(brewer.pal(11, "Spectral"))(nb.cols)
# Facet labs
country.labs <- c("1" = "Benin", "2" = "Burkina Faso", "3" = "Finland")
# Plot
metaphlan.p16 <- plot_bar(metaphlan_mrg_loc_trans, fill = "Genus", title = "15 most abundant genera, Metaphlan3") +
facet_grid(~country, scales = "free_x", space = "free_x", labeller = labeller(country = country.labs)) +
theme_classic() +
theme(axis.text.x = element_text(angle=55, hjust=1, size = 10),
axis.title.x = element_blank()) +
ylab("Relative abundance (%)") + xlab("") +
scale_fill_manual(values=mycolors) + geom_bar(stat="identity") +
guides(fill = guide_legend(label.theme = element_text(size = 8), keywidth = 1.5, keyheight = 1.5, title = "Genus"))
# Plot both
metaxa.p14
metaphlan.p16
# The results seem coherent between the two methods. Aeromonas sp are relatively common in Finnish samples.
# Let's use again the unfiltered data
metaphlan_PHY_genus <- tax_glom(metaphlan_PHY, "Genus")
heat.metaphlan <- plot_taxa_heatmap(metaphlan_PHY_genus,
subset.top = 50,
VariableA = "country",
heatcolors = rev(brewer.pal(12, "GnBu")),
transformation = "log10",
cluster_cols = T)
heat.metaphlan
# The plot could be edited to have bigger font size and have more annotations (location, sample type).
# Average 16S counts by location
mean16S_counts <- mean(metadata$SSU_counts, invert = TRUE)
lo_data <- data.frame(SUM = round(mean16S_counts * (sample_sums(otu_table(resfinder_PHY_clean)))),
location = sample_data(resfinder_PHY_clean)$location)
plot(lo_data$SUM~lo_data$location, cex.axis=0.5, srt=45, las=2, xlab=NULL)
# Testing models to support the data
## Linear
model.ln<-lm(SUM~location, data=lo_data)
summary(model.ln)
shapiro.test(lo_data$SUM)
plot(model.ln)
## Warning: not plotting observations with leverage one:
## 69
## Warning: not plotting observations with leverage one:
## 69
# The p value is > 0.05 and differs significantly from normal distribution
## Log
model.logln<-lm(log(SUM)~location, data=lo_data)
summary(model.logln)
shapiro.test(log(lo_data$SUM))
plot(model.logln)
## Warning: not plotting observations with leverage one:
## 69
## Warning: not plotting observations with leverage one:
## 69
# The p value is > 0.05 and differs significantly from normal distribution
## Poisson
model.pois = glm(SUM ~ location , data = lo_data, family = poisson)
summary(model.pois)
plot(model.pois)
## Warning: not plotting observations with leverage one:
## 69
## Warning: not plotting observations with leverage one:
## 69
## Quasipoisson
model.qpois<-glm(SUM~location, data=lo_data, family="quasipoisson")
summary(model.qpois)
plot(model.qpois)
## Warning: not plotting observations with leverage one:
## 69
## Warning: not plotting observations with leverage one:
## 69
## Negative binomial
model.nb <- glm.nb(SUM~location, data=lo_data)
summary(model.nb)
plot(model.nb)
## Warning: not plotting observations with leverage one:
## 69
## Warning: not plotting observations with leverage one:
## 69
# Summarize and compare models
data.frame(linear=coef(model.ln),
loglinear=exp(coef(model.logln)),
poisson=exp(coef(model.pois)),
qpoisson=exp(coef(model.qpois)),
negbin=exp(coef(model.nb)))
# Critical value
qchisq(0.95, df.residual(model.pois))
deviance(model.ln)
deviance(model.logln)
deviance(model.pois)
deviance(model.qpois)
deviance(model.nb)
# None of the models fits the data. However, negative binominal model gets the closest.
# Plot using neg. binom. model
fit <- glm.nb(SUM ~ location, data = lo_data, link = log)
lo_data <- cbind(lo_data, Mean = predict(fit, newdata = lo_data, type = "response"), SE = predict(fit, newdata = lo_data, type = "response", se.fit = T)$se.fit)
nb.cols <- 14
mycolors <- colorRampPalette(brewer.pal(11, "Spectral"))(nb.cols)
ARG.sum.lo <- ggplot(lo_data, aes(x = location, y = Mean)) +
geom_line() + geom_jitter(data = lo_data, aes(x = location, y = SUM, color = location),
size = 2.3, alpha = 0.5, width = 0.3) +
geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = 0.6, lwd = 0.6) +
geom_point(size = 0.9) +
scale_fill_manual(values = mycolors) +
theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(y = "Sum abundance/16S",
x = "") + guides(color = FALSE, alpha = FALSE) + labs(title = "ARGs") +
scale_y_continuous(trans = "log10")
ARG.sum.lo
#lo_data <- data.frame(SUM = round(mean16S_counts * (sample_sums(otu_table(resfinder_PHY)))), location = sample_data(resfinder_PHY)$location)
#boxplot(lo_data$SUM)
#boxplot(lo_data$SUM, plot=FALSE)$out
#outliers <- boxplot(lo_data$SUM, plot=FALSE)$out
#print(outliers)
# Where they at
#lo_data[which(lo_data$SUM %in% outliers),]
# Remove
#lo_data <- lo_data[-which(lo_data$SUM %in% outliers),]
#boxplot(lo_data$SUM)
# New plot
#fit <- glm.nb(SUM ~ location, data = lo_data, link = log)
#lo_data <- cbind(lo_data, Mean = predict(fit, newdata = lo_data, type = "response"), SE = predict(fit, newdata =lo_data, type = "response", se.fit = T)$se.fit)
#nb.cols <- 14
#mycolors <- colorRampPalette(brewer.pal(8, "Dark2"))(nb.cols)
#ARG.sum.lo_outl <- ggplot(lo_data, aes(x = location, y = Mean)) +
#scale_fill_manual(values = mycolors) +
#geom_line() + geom_jitter(data = lo_data, aes(x = location, y = SUM, color = location),
#size = 2.3, alpha = 0.5, width = 0.3) +
#geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = 0.6, lwd = 0.6) +
#geom_point(size = 0.9) +
#theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#labs(y = "Sum abundance/16S", x = "") + guides(color = FALSE, alpha = FALSE) + labs(title = "ARGs") + scale_y_log10()
#ARG.sum.lo_outl
lo_data <- data.frame(SUM = round(mean16S_counts * (sample_sums(otu_table(resfinder_PHY_clean)))),
location = sample_data(resfinder_PHY_clean)$location)
# When outliers removed use:
#lo_data[which(lo_data$SUM %in% outliers),]
#lo_data <- lo_data[-which(lo_data$SUM %in% outliers),]
fit <- glm.nb(SUM ~ location, data = lo_data, link = log)
# Tukey's post hoc test
glht.mod <- glht(fit, mcp(location = "Tukey"))
print(summary(glht(glht.mod)))
## Warning in chkdots(...): Argument(s) 'complete' passed to '...' are ignored
## Warning in chkdots(...): Argument(s) 'complete' passed to '...' are ignored
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
# Significant comparisons:
#Clinique SOUKA - Central clinic of Calavi == 0 -3.955 <0.01 **
#Helsinki University Hospital - Central clinic of Calavi == 0 -3.378 0.0399 *
#Hopital de Zone Calavi - Central clinic of Calavi == 0 -4.927 <0.01 ***
#Savalou area - Central clinic of Calavi == 0 -8.032 <0.01 ***
#Source de Vie, Ouagadougou - Central clinic of Calavi == 0 -3.920 <0.01 **
#Savalou area - Central Finland Health Care District == 0 -3.939 <0.01 **
#Savalou area - Clinique SOUKA == 0 -4.638 <0.01 ***
#Savalou area - Helsinki University Hospital == 0 -3.826 <0.01 **
#Savalou area - Hopital de Zone Calavi == 0 -4.394 <0.01 ***
#Yalgado teaching hospital of Ouagadougou - Hopital de Zone Calavi == 0 3.623 0.0171 *
#Savalou area - Hospital Saint Jean == 0 -6.079 <0.01 ***
#Savalou area - Koudougou Hospital == 0 -6.427 <0.01 ***
#Savalou area - Menontin hospital == 0 -6.546 <0.01 ***
#Savalou area - Nanoro Hospital == 0 -3.868 <0.01 **
#Source de Vie, Ouagadougou - Savalou area == 0 3.308 0.0492 *
#Yalgado teaching hospital of Ouagadougou - Savalou area == 0 7.406 <0.01 ***
# Average 16S counts bu location
mean16S_counts <- mean(metadata$SSU_counts, invert = TRUE)
co_data <- data.frame(SUM = round(mean16S_counts * (sample_sums(otu_table(resfinder_PHY_clean)))),
country = sample_data(resfinder_PHY_clean)$country)
plot(co_data$SUM~co_data$country, cex.axis=0.5, srt=45, las=2, xlab=NULL)
# Testing models to support the data
## Linear
model.ln<-lm(SUM~country, data=co_data)
summary(model.ln)
shapiro.test(lo_data$SUM)
plot(model.ln)
# The p value is > 0.05 and differs significantly from normal distribution
## Log
model.logln<-lm(log(SUM)~country, data=co_data)
summary(model.logln)
shapiro.test(log(co_data$SUM))
plot(model.logln)
# Data is not normally distributed.
## Poisson
model.pois = glm(SUM~country , data = co_data, family = poisson)
summary(model.pois)
plot(model.pois)
## Quasipoisson
model.qpois<-glm(SUM~country, data=co_data, family="quasipoisson")
summary(model.qpois)
plot(model.qpois)
## Negative binomial
model.nb <- glm.nb(SUM~country, data=co_data)
summary(model.nb)
plot(model.nb)
1/model.nb$theta
-2*(logLik(model.pois)-logLik(model.nb))
# Summarize models
data.frame(linear=coef(model.ln),
loglinear=exp(coef(model.logln)),
poisson=exp(coef(model.pois)),
qpoisson=exp(coef(model.qpois)),
negbin=exp(coef(model.nb)))
# Critical value
qchisq(0.95, df.residual(model.nb))
deviance(model.ln)
deviance(model.logln)
deviance(model.pois)
deviance(model.qpois)
deviance(model.nb)
# None of the models fits the data. However, negative binominal gets the closest.
# Plot using neg. binom. model
co_data <- data.frame(SUM = round(mean16S_counts * (sample_sums(otu_table(resfinder_PHY_clean)))),
country = sample_data(resfinder_PHY_clean)$country)
fit <- glm.nb(SUM ~ country, data = co_data, link = log)
co_data <- cbind(co_data, Mean = predict(fit, newdata = co_data, type = "response"), SE = predict(fit, newdata = co_data, type = "response", se.fit = T)$se.fit)
ARG.sum.co <- ggplot(co_data, aes(x = country, y = Mean)) +
scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) +
geom_line() + geom_jitter(data = co_data, aes(x = country, y = SUM, color = country),
size = 2.3, alpha = 0.5, width = 0.3) +
geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = 0.6, lwd = 0.6) +
geom_point(size = 0.9) +
theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(y = "Sum abundance/16S",
x = "") + guides(color = FALSE, alpha = FALSE) + labs(title = "ARGs") + scale_y_log10()
ARG.sum.co
#co_data <- data.frame(SUM = round(mean16S_counts * (sample_sums(otu_table(resfinder_PHY)))), country = sample_data(resfinder_PHY)$country)
#boxplot(co_data$SUM)
#boxplot(co_data$SUM, plot=FALSE)$out
#outliers <- boxplot(co_data$SUM, plot=FALSE)$out
#print(outliers)
# Where they at
#co_data[which(co_data$SUM %in% outliers),]
# Remove
#co_data <- co_data[-which(co_data$SUM %in% outliers),]
#boxplot(co_data$SUM)
# New plot
#fit <- glm.nb(SUM ~ country, data = co_data, link = log)
#co_data <- cbind(co_data, Mean = predict(fit, newdata = co_data, type = "response"), SE = predict(fit, newdata = co_data, type = "response", se.fit = T)$se.fit)
#ARG.sum.co_outl <- ggplot(co_data, aes(x = country, y = Mean)) +
#scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) +
#geom_line() + geom_jitter(data = co_data, aes(x = country, y = SUM, color = country),
#size = 2.3, alpha = 0.5, width = 0.3) +
#geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), width = 0.6, lwd = 0.6) +
#geom_point(size = 0.9) +
#theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#labs(y = "Sum abundance/16S", x = "") + guides(color = FALSE, alpha = FALSE) + labs(title = "ARGs") + scale_y_log10()
#ARG.sum.co_outl
co_data <- data.frame(SUM = round(mean16S_counts * (sample_sums(otu_table(resfinder_PHY_clean)))),
country = sample_data(resfinder_PHY_clean)$country)
# Exclude outliers
#co_data[which(co_data$SUM %in% outliers),]
#co_data <- co_data[-which(co_data$SUM %in% outliers),]
fit <- glm.nb(SUM ~ country, data = co_data, link = log)
# Tukey's post hoc test
glht.mod <- glht(fit, mcp(country = "Tukey"))
summary(glht(glht.mod))
## Warning in chkdots(...): Argument(s) 'complete' passed to '...' are ignored
## Warning in chkdots(...): Argument(s) 'complete' passed to '...' are ignored
# No significant results. Maybe there would be if only actual hospital ww samples were included? Then Savalou environment would be excluded with its low sum abundance.
resfinder_PHY_Class <- tax_glom(resfinder_PHY_clean, taxrank = "Class")
# All classes
resfinder_PHY_Class_abund <- prune_taxa(names(sort(taxa_sums(resfinder_PHY_Class), TRUE)),
resfinder_PHY_Class)
# Normalize by number of samples in each country
otu_table(resfinder_PHY_Class_abund) <- (otu_table(resfinder_PHY_Class_abund)[,]/as.matrix(table(sample_data(resfinder_PHY_Class_abund)$country))[, 1])
## Warning in (if (isS4(e1)) e1@.Data else e1)/if (isS4(e2)) e2@.Data else e2:
## longer object length is not a multiple of shorter object length
p17<-plot_bar(resfinder_PHY_Class_abund, x="name", fill="Class")
resfinder.p17 <- p17 + facet_grid(~ location, scales = "free", space = "free") +
ggtitle("Sum relative abundance of ARGs by classes/16S") +
theme_linedraw() +
theme(axis.text.x = element_text(angle=90, hjust=1, size = 8),
axis.title.x = element_blank()) +
guides(fill = guide_legend(label.theme = element_text(size = 8), keywidth = 1.2, keyheight = 1.2))
# Remove OTU separators
resfinder.p17 + geom_bar(aes(color=Class, fill=Class), stat="identity", position="stack")
# Top 12 ARG genes, sum abundance
resfinder_PHY_Gene <- tax_glom(resfinder_PHY_clean, taxrank = "Gene")
resfinder_PHY_Gene_abund <- prune_taxa(names(sort(taxa_sums(resfinder_PHY_Gene), TRUE)[1:12]),
resfinder_PHY_Gene)
# Normalize by number of samples in each country
otu_table(resfinder_PHY_Gene_abund) <- (otu_table(resfinder_PHY_Gene_abund)[,]/as.matrix(table(sample_data(resfinder_PHY_Gene_abund)$country))[, 1])
p18<-plot_bar(resfinder_PHY_Gene_abund, x="name", fill="Gene")
resfinder.p18 <- p18 + facet_grid(~ location, scales = "free", space = "free") +
ggtitle("Sum relative abundances of ARGs/16S") +
theme_linedraw() +
theme(axis.text.x = element_text(angle=90, hjust=1, size = 8),
axis.title.x = element_blank()) +
guides(fill = guide_legend(label.theme = element_text(size = 8), keywidth = 1.2, keyheight = 1.2))
# Remove OTU separators
resfinder.p18 + geom_bar(aes(color=Gene, fill=Gene), stat="identity", position="stack")
# Betalactams
resfinder_PHY_betalactams = subset_taxa(resfinder_PHY_clean, Class=="Betalactam")
resfinder_PHY_betalactams_Gene <- tax_glom(resfinder_PHY_betalactams, "Gene")
# Top12
resfinder_PHY_betalactams_Gene_abund <- prune_taxa(names(sort(taxa_sums(resfinder_PHY_betalactams_Gene), TRUE)[1:12]),
resfinder_PHY_betalactams_Gene)
# Normalize by sample number in each country
otu_table(resfinder_PHY_betalactams_Gene_abund) <- (otu_table(resfinder_PHY_betalactams_Gene_abund)[, ]/as.matrix(table(sample_data(resfinder_PHY_betalactams_Gene_abund)$country))[,
1])
p19<-plot_bar(resfinder_PHY_betalactams_Gene_abund, x="name", fill="Gene")
resfinder.p19 <- p19 + facet_grid(~ location, scales = "free", space = "free") +
ggtitle("Sum of relative abundances of top 12 betalactams/16S") +
theme_linedraw() +
theme(axis.text.x = element_text(angle=90, hjust=1, size = 8),
axis.title.x = element_blank()) +
guides(fill = guide_legend(label.theme = element_text(size = 8), keywidth = 1.2, keyheight = 1.2))
# Remove OTU separators
resfinder.p19 + geom_bar(aes(color=Gene, fill=Gene), stat="identity", position="stack")